import scrapy

# 全站数据爬取
from xiaohua.items import XiaohuaItem
#  将所有的页面url 添加到start_url中
#  手动发送请求
class XiaohuasSpider(scrapy.Spider):
    name = 'xiaohuas'
    # allowed_domains = ['www.xxx.com']
    start_urls = ['http://www.521609.com/ziliao/']
    # 生成一个通用的url模板不可变
    url = "http://www.521609.com/ziliao/index_%d.html"
    page_num = 2
    def parse(self, response):
        # 手动发送请求
        li_list = response.xpath('/html/body/div[4]/div[2]/ul//li')
        for li in li_list:
            img_name = li.xpath("./a[2]/h3/text()").extract_first()
            img_pic = li.xpath("./a[1]/span/img/@src").extract_first()
            img_pic = "http://www.521609.com/"+img_pic
            item = XiaohuaItem()
            item["img_name"]= img_name
            item['img_pic']= img_pic
            print(img_name)
            print(img_pic)
            yield item
        if self.page_num <= 136:
            new_url = format(self.url % self.page_num)
            self.page_num += 1
            # 手动请求发送 递归调用parse 递归的条件是 页数小于等于136
            yield scrapy.Request(url=new_url, callback=self.parse)

